!pip install pyforest
Defaulting to user installation because normal site-packages is not writeable Requirement already satisfied: pyforest in c:\users\admin\appdata\roaming\python\python311\site-packages (1.1.0)
df = pd.read_excel("IPL.xlsx")
df
| Sl.NO. | PLAYER NAME | AGE | COUNTRY | TEAM | PLAYING ROLE | T-RUNS | T-WKTS | ODI-RUNS-S | ODI-SR-B | ODI-WKTS | ODI-SR-BL | CAPTAINCY EXP | RUNS-S | HS | AVE | SR-B | SIXERS | RUNS-C | WKTS | AVE-BL | ECON | SR-BL | AUCTION YEAR | BASE PRICE | SOLD PRICE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Abdulla, YA | 2 | SA | KXIP | Allrounder | 0 | 0 | 0 | 0.00 | 0 | 0.0 | 0 | 0 | 0 | 0.00 | 0.00 | 0 | 307 | 15 | 20.47 | 8.90 | 13.93 | 2009 | 50000 | 50000 |
| 1 | 2 | Abdur Razzak | 2 | BAN | RCB | Bowler | 214 | 18 | 657 | 71.41 | 185 | 37.6 | 0 | 0 | 0 | 0.00 | 0.00 | 0 | 29 | 0 | 0.00 | 14.50 | 0.00 | 2008 | 50000 | 50000 |
| 2 | 3 | Agarkar, AB | 2 | IND | KKR | Bowler | 571 | 58 | 1269 | 80.62 | 288 | 32.9 | 0 | 167 | 39 | 18.56 | 121.01 | 5 | 1059 | 29 | 36.52 | 8.81 | 24.90 | 2008 | 200000 | 350000 |
| 3 | 4 | Ashwin, R | 1 | IND | CSK | Bowler | 284 | 31 | 241 | 84.56 | 51 | 36.8 | 0 | 58 | 11 | 5.80 | 76.32 | 0 | 1125 | 49 | 22.96 | 6.23 | 22.14 | 2011 | 100000 | 850000 |
| 4 | 5 | Badrinath, S | 2 | IND | CSK | Batsman | 63 | 0 | 79 | 45.93 | 0 | 0.0 | 0 | 1317 | 71 | 32.93 | 120.71 | 28 | 0 | 0 | 0.00 | 0.00 | 0.00 | 2011 | 100000 | 800000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 125 | 126 | Yadav, AS | 2 | IND | DC | Batsman | 0 | 0 | 0 | 0.00 | 0 | 0.0 | 0 | 49 | 16 | 9.80 | 125.64 | 2 | 0 | 0 | 0.00 | 0.00 | 0.00 | 2010 | 50000 | 750000 |
| 126 | 127 | Younis Khan | 2 | PAK | RR | Batsman | 6398 | 7 | 6814 | 75.78 | 3 | 86.6 | 1 | 3 | 3 | 3.00 | 42.85 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 2008 | 225000 | 225000 |
| 127 | 128 | Yuvraj Singh | 2 | IND | KXIP+ | Batsman | 1775 | 9 | 8051 | 87.58 | 109 | 44.3 | 1 | 1237 | 66 | 26.32 | 131.88 | 67 | 569 | 23 | 24.74 | 7.02 | 21.13 | 2011 | 400000 | 1800000 |
| 128 | 129 | Zaheer Khan | 2 | IND | MI+ | Bowler | 1114 | 288 | 790 | 73.55 | 278 | 35.4 | 0 | 99 | 23 | 9.90 | 91.67 | 1 | 1783 | 65 | 27.43 | 7.75 | 21.26 | 2008 | 200000 | 450000 |
| 129 | 130 | Zoysa, DNT | 2 | SL | DC | Bowler | 288 | 64 | 343 | 95.81 | 108 | 39.4 | 0 | 11 | 10 | 11.00 | 122.22 | 0 | 99 | 2 | 49.50 | 9.00 | 33.00 | 2008 | 100000 | 110000 |
130 rows × 26 columns
df.describe(include='all').T
| count | unique | top | freq | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Sl.NO. | 130.0 | NaN | NaN | NaN | 65.5 | 37.671829 | 1.0 | 33.25 | 65.5 | 97.75 | 130.0 |
| PLAYER NAME | 130 | 130 | Abdulla, YA | 1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| AGE | 130.0 | NaN | NaN | NaN | 2.092308 | 0.576627 | 1.0 | 2.0 | 2.0 | 2.0 | 3.0 |
| COUNTRY | 130 | 10 | IND | 53 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| TEAM | 130 | 17 | CSK | 14 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| PLAYING ROLE | 130 | 4 | Bowler | 44 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| T-RUNS | 130.0 | NaN | NaN | NaN | 2166.715385 | 3305.646757 | 0.0 | 25.5 | 542.5 | 3002.25 | 15470.0 |
| T-WKTS | 130.0 | NaN | NaN | NaN | 66.530769 | 142.676855 | 0.0 | 0.0 | 7.0 | 47.5 | 800.0 |
| ODI-RUNS-S | 130.0 | NaN | NaN | NaN | 2508.738462 | 3582.205625 | 0.0 | 73.25 | 835.0 | 3523.5 | 18426.0 |
| ODI-SR-B | 130.0 | NaN | NaN | NaN | 71.164385 | 25.89844 | 0.0 | 65.65 | 78.225 | 86.79 | 116.66 |
| ODI-WKTS | 130.0 | NaN | NaN | NaN | 76.076923 | 111.20507 | 0.0 | 0.0 | 18.5 | 106.0 | 534.0 |
| ODI-SR-BL | 130.0 | NaN | NaN | NaN | 34.033846 | 26.751749 | 0.0 | 0.0 | 36.6 | 45.325 | 150.0 |
| CAPTAINCY EXP | 130.0 | NaN | NaN | NaN | 0.315385 | 0.466466 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 |
| RUNS-S | 130.0 | NaN | NaN | NaN | 514.246154 | 615.226335 | 0.0 | 39.0 | 172.0 | 925.25 | 2254.0 |
| HS | 130.0 | NaN | NaN | NaN | 47.430769 | 36.403624 | 0.0 | 16.0 | 35.5 | 73.75 | 158.0 |
| AVE | 130.0 | NaN | NaN | NaN | 18.719308 | 11.094224 | 0.0 | 9.825 | 18.635 | 27.8725 | 50.11 |
| SR-B | 130.0 | NaN | NaN | NaN | 111.053462 | 35.928907 | 0.0 | 98.2375 | 118.51 | 129.1025 | 235.49 |
| SIXERS | 130.0 | NaN | NaN | NaN | 17.692308 | 23.828146 | 0.0 | 1.0 | 6.0 | 29.75 | 129.0 |
| RUNS-C | 130.0 | NaN | NaN | NaN | 475.523077 | 558.314049 | 0.0 | 0.0 | 297.0 | 689.25 | 1975.0 |
| WKTS | 130.0 | NaN | NaN | NaN | 17.169231 | 21.816763 | 0.0 | 0.0 | 8.5 | 23.75 | 83.0 |
| AVE-BL | 130.0 | NaN | NaN | NaN | 23.110231 | 20.802057 | 0.0 | 0.0 | 24.785 | 35.58 | 126.3 |
| ECON | 130.0 | NaN | NaN | NaN | 6.204462 | 4.941531 | 0.0 | 0.0 | 7.38 | 8.2475 | 38.11 |
| SR-BL | 130.0 | NaN | NaN | NaN | 17.382615 | 15.273422 | 0.0 | 0.0 | 19.935 | 26.2125 | 100.2 |
| AUCTION YEAR | 130.0 | NaN | NaN | NaN | 2009.092308 | 1.377821 | 2008.0 | 2008.0 | 2008.0 | 2011.0 | 2011.0 |
| BASE PRICE | 130.0 | NaN | NaN | NaN | 192230.769231 | 153097.300897 | 20000.0 | 100000.0 | 200000.0 | 225000.0 | 1350000.0 |
| SOLD PRICE | 130.0 | NaN | NaN | NaN | 521223.076923 | 406807.351419 | 20000.0 | 225000.0 | 437500.0 | 700000.0 | 1800000.0 |
df.dtypes.to_frame()
| 0 | |
|---|---|
| Sl.NO. | int64 |
| PLAYER NAME | object |
| AGE | int64 |
| COUNTRY | object |
| TEAM | object |
| PLAYING ROLE | object |
| T-RUNS | int64 |
| T-WKTS | int64 |
| ODI-RUNS-S | int64 |
| ODI-SR-B | float64 |
| ODI-WKTS | int64 |
| ODI-SR-BL | float64 |
| CAPTAINCY EXP | int64 |
| RUNS-S | int64 |
| HS | int64 |
| AVE | float64 |
| SR-B | float64 |
| SIXERS | int64 |
| RUNS-C | int64 |
| WKTS | int64 |
| AVE-BL | float64 |
| ECON | float64 |
| SR-BL | float64 |
| AUCTION YEAR | int64 |
| BASE PRICE | int64 |
| SOLD PRICE | int64 |
df.size
3380
df.shape
(130, 26)
df.isnull().sum()
Sl.NO. 0 PLAYER NAME 0 AGE 0 COUNTRY 0 TEAM 0 PLAYING ROLE 0 T-RUNS 0 T-WKTS 0 ODI-RUNS-S 0 ODI-SR-B 0 ODI-WKTS 0 ODI-SR-BL 0 CAPTAINCY EXP 0 RUNS-S 0 HS 0 AVE 0 SR-B 0 SIXERS 0 RUNS-C 0 WKTS 0 AVE-BL 0 ECON 0 SR-BL 0 AUCTION YEAR 0 BASE PRICE 0 SOLD PRICE 0 dtype: int64
import matplotlib.pyplot as plt
import seaborn as sn
plt.figure(figsize=(30,30))
sn.boxplot(data=df)
<Axes: >
sn.boxplot(x="COUNTRY", y="SOLD PRICE", data=df)
<Axes: xlabel='COUNTRY', ylabel='SOLD PRICE'>
bar = sn.countplot(x='AGE' , data = df, hue='PLAYING ROLE', palette='turbo_r', )
for container in bar.containers:
bar.bar_label(container)
plt.show()
count = df['COUNTRY'].value_counts()
count
COUNTRY IND 53 AUS 22 SA 16 SL 12 PAK 9 NZ 7 WI 6 ENG 3 BAN 1 ZIM 1 Name: count, dtype: int64
lable = df['COUNTRY'].value_counts().index
lable
Index(['IND', 'AUS', 'SA', 'SL', 'PAK', 'NZ', 'WI', 'ENG', 'BAN', 'ZIM'], dtype='object', name='COUNTRY')
plt.figure(figsize=(25,25))
plt.pie(count, labels=lable, autopct='%.2f%%', startangle=90 , textprops={'fontsize': 14})
centre_circle=plt.Circle((0,0),0.70, fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.axis('equal')
plt.show()
Q1=df['SOLD PRICE'].quantile(0.25)
Q2=df['SOLD PRICE'].quantile(0.50)
Q3=df['SOLD PRICE'].quantile(0.75)
Q4=df['SOLD PRICE'].quantile(1)
print(Q1)
print(Q2)
print(Q3)
print(Q4)
225000.0 437500.0 700000.0 1800000.0
IQR = Q3-Q1
print(IQR)
475000.0
UW= Q3+(1.5*IQR)
LW = Q1-(1.5*IQR)
print(UW)
print(LW)
1412500.0 -487500.0
out_liers = df[(df["SOLD PRICE"] > UW) | (df["SOLD PRICE"] < LW)]
out_liers
| Sl.NO. | PLAYER NAME | AGE | COUNTRY | TEAM | PLAYING ROLE | T-RUNS | T-WKTS | ODI-RUNS-S | ODI-SR-B | ODI-WKTS | ODI-SR-BL | CAPTAINCY EXP | RUNS-S | HS | AVE | SR-B | SIXERS | RUNS-C | WKTS | AVE-BL | ECON | SR-BL | AUCTION YEAR | BASE PRICE | SOLD PRICE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 15 | 16 | Dhoni, MS | 2 | IND | CSK | W. Keeper | 3509 | 0 | 6773 | 88.19 | 1 | 12.0 | 1 | 1782 | 70 | 37.13 | 136.45 | 64 | 0 | 0 | 0.00 | 0.00 | 0.00 | 2008 | 400000 | 1500000 |
| 23 | 24 | Flintoff, A | 2 | ENG | CSK | Allrounder | 3845 | 226 | 3394 | 88.82 | 169 | 33.2 | 1 | 62 | 24 | 31.00 | 116.98 | 2 | 105 | 2 | 52.50 | 9.55 | 33.00 | 2009 | 950000 | 1550000 |
| 50 | 51 | Kohli, V | 1 | IND | RCB | Batsman | 491 | 0 | 3590 | 86.31 | 2 | 137.0 | 1 | 1639 | 73 | 28.26 | 119.29 | 49 | 345 | 4 | 86.25 | 8.84 | 58.50 | 2011 | 150000 | 1800000 |
| 83 | 84 | Pietersen, KP | 2 | ENG | RCB+ | Batsman | 6654 | 5 | 4184 | 86.76 | 7 | 57.1 | 1 | 634 | 103 | 42.27 | 141.20 | 30 | 215 | 7 | 30.71 | 7.41 | 24.86 | 2009 | 1350000 | 1550000 |
| 93 | 94 | Sehwag, V | 2 | IND | DD | Batsman | 8178 | 40 | 8090 | 104.68 | 95 | 45.4 | 1 | 1879 | 119 | 30.31 | 167.32 | 79 | 226 | 6 | 37.67 | 10.56 | 21.67 | 2011 | 400000 | 1800000 |
| 111 | 112 | Tendulkar, SR | 3 | IND | MI | Batsman | 15470 | 45 | 18426 | 86.23 | 154 | 52.2 | 1 | 2047 | 100 | 37.91 | 119.22 | 24 | 58 | 0 | 0.00 | 9.67 | 0.00 | 2011 | 400000 | 1800000 |
| 113 | 114 | Tiwary, SS | 1 | IND | MI+ | Batsman | 0 | 0 | 49 | 87.50 | 0 | 0.0 | 0 | 836 | 42 | 25.33 | 119.60 | 32 | 0 | 0 | 0.00 | 0.00 | 0.00 | 2011 | 100000 | 1600000 |
| 127 | 128 | Yuvraj Singh | 2 | IND | KXIP+ | Batsman | 1775 | 9 | 8051 | 87.58 | 109 | 44.3 | 1 | 1237 | 66 | 26.32 | 131.88 | 67 | 569 | 23 | 24.74 | 7.02 | 21.13 | 2011 | 400000 | 1800000 |
sn.boxplot(df["SOLD PRICE"])
<Axes: >
import plotly.express as p
p.box(df['SOLD PRICE'])
z_scores = stats.zscore(df['SOLD PRICE'])
z_scores
0 -1.162826
1 -1.162826
2 -0.422523
3 0.811315
4 0.687931
...
125 0.564547
126 -0.730982
127 3.155606
128 -0.175755
129 -1.014765
Name: SOLD PRICE, Length: 130, dtype: float64
outliers = df[(z_scores > 3) | (z_scores < -3)]
outliers
| Sl.NO. | PLAYER NAME | AGE | COUNTRY | TEAM | PLAYING ROLE | T-RUNS | T-WKTS | ODI-RUNS-S | ODI-SR-B | ODI-WKTS | ODI-SR-BL | CAPTAINCY EXP | RUNS-S | HS | AVE | SR-B | SIXERS | RUNS-C | WKTS | AVE-BL | ECON | SR-BL | AUCTION YEAR | BASE PRICE | SOLD PRICE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 50 | 51 | Kohli, V | 1 | IND | RCB | Batsman | 491 | 0 | 3590 | 86.31 | 2 | 137.0 | 1 | 1639 | 73 | 28.26 | 119.29 | 49 | 345 | 4 | 86.25 | 8.84 | 58.50 | 2011 | 150000 | 1800000 |
| 93 | 94 | Sehwag, V | 2 | IND | DD | Batsman | 8178 | 40 | 8090 | 104.68 | 95 | 45.4 | 1 | 1879 | 119 | 30.31 | 167.32 | 79 | 226 | 6 | 37.67 | 10.56 | 21.67 | 2011 | 400000 | 1800000 |
| 111 | 112 | Tendulkar, SR | 3 | IND | MI | Batsman | 15470 | 45 | 18426 | 86.23 | 154 | 52.2 | 1 | 2047 | 100 | 37.91 | 119.22 | 24 | 58 | 0 | 0.00 | 9.67 | 0.00 | 2011 | 400000 | 1800000 |
| 127 | 128 | Yuvraj Singh | 2 | IND | KXIP+ | Batsman | 1775 | 9 | 8051 | 87.58 | 109 | 44.3 | 1 | 1237 | 66 | 26.32 | 131.88 | 67 | 569 | 23 | 24.74 | 7.02 | 21.13 | 2011 | 400000 | 1800000 |
cleaned_data = df[(z_scores <=3) & (z_scores>=-3)]
cleaned_data
| Sl.NO. | PLAYER NAME | AGE | COUNTRY | TEAM | PLAYING ROLE | T-RUNS | T-WKTS | ODI-RUNS-S | ODI-SR-B | ODI-WKTS | ODI-SR-BL | CAPTAINCY EXP | RUNS-S | HS | AVE | SR-B | SIXERS | RUNS-C | WKTS | AVE-BL | ECON | SR-BL | AUCTION YEAR | BASE PRICE | SOLD PRICE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Abdulla, YA | 2 | SA | KXIP | Allrounder | 0 | 0 | 0 | 0.00 | 0 | 0.0 | 0 | 0 | 0 | 0.00 | 0.00 | 0 | 307 | 15 | 20.47 | 8.90 | 13.93 | 2009 | 50000 | 50000 |
| 1 | 2 | Abdur Razzak | 2 | BAN | RCB | Bowler | 214 | 18 | 657 | 71.41 | 185 | 37.6 | 0 | 0 | 0 | 0.00 | 0.00 | 0 | 29 | 0 | 0.00 | 14.50 | 0.00 | 2008 | 50000 | 50000 |
| 2 | 3 | Agarkar, AB | 2 | IND | KKR | Bowler | 571 | 58 | 1269 | 80.62 | 288 | 32.9 | 0 | 167 | 39 | 18.56 | 121.01 | 5 | 1059 | 29 | 36.52 | 8.81 | 24.90 | 2008 | 200000 | 350000 |
| 3 | 4 | Ashwin, R | 1 | IND | CSK | Bowler | 284 | 31 | 241 | 84.56 | 51 | 36.8 | 0 | 58 | 11 | 5.80 | 76.32 | 0 | 1125 | 49 | 22.96 | 6.23 | 22.14 | 2011 | 100000 | 850000 |
| 4 | 5 | Badrinath, S | 2 | IND | CSK | Batsman | 63 | 0 | 79 | 45.93 | 0 | 0.0 | 0 | 1317 | 71 | 32.93 | 120.71 | 28 | 0 | 0 | 0.00 | 0.00 | 0.00 | 2011 | 100000 | 800000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 124 | 125 | White, CL | 2 | AUS | RCB+ | Batsman | 146 | 5 | 2037 | 80.48 | 12 | 27.5 | 1 | 745 | 78 | 31.04 | 132.09 | 29 | 70 | 0 | 0.00 | 14.00 | 0.00 | 2008 | 100000 | 500000 |
| 125 | 126 | Yadav, AS | 2 | IND | DC | Batsman | 0 | 0 | 0 | 0.00 | 0 | 0.0 | 0 | 49 | 16 | 9.80 | 125.64 | 2 | 0 | 0 | 0.00 | 0.00 | 0.00 | 2010 | 50000 | 750000 |
| 126 | 127 | Younis Khan | 2 | PAK | RR | Batsman | 6398 | 7 | 6814 | 75.78 | 3 | 86.6 | 1 | 3 | 3 | 3.00 | 42.85 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 2008 | 225000 | 225000 |
| 128 | 129 | Zaheer Khan | 2 | IND | MI+ | Bowler | 1114 | 288 | 790 | 73.55 | 278 | 35.4 | 0 | 99 | 23 | 9.90 | 91.67 | 1 | 1783 | 65 | 27.43 | 7.75 | 21.26 | 2008 | 200000 | 450000 |
| 129 | 130 | Zoysa, DNT | 2 | SL | DC | Bowler | 288 | 64 | 343 | 95.81 | 108 | 39.4 | 0 | 11 | 10 | 11.00 | 122.22 | 0 | 99 | 2 | 49.50 | 9.00 | 33.00 | 2008 | 100000 | 110000 |
126 rows × 26 columns
Players_name = df['PLAYER NAME']
Sixers = df['SIXERS']
Data = { 'Player_name': Players_name, 'Sixers':Sixers}
D = pd.DataFrame(Data)
D
| Player_name | Sixers | |
|---|---|---|
| 0 | Abdulla, YA | 0 |
| 1 | Abdur Razzak | 0 |
| 2 | Agarkar, AB | 5 |
| 3 | Ashwin, R | 0 |
| 4 | Badrinath, S | 28 |
| ... | ... | ... |
| 125 | Yadav, AS | 2 |
| 126 | Younis Khan | 0 |
| 127 | Yuvraj Singh | 67 |
| 128 | Zaheer Khan | 1 |
| 129 | Zoysa, DNT | 0 |
130 rows × 2 columns
Player_hit_more_than_80_sixers = D[(D["Player_name"] == 'Player_name') | (D["Sixers"] > 80)]
Player_hit_more_than_80_sixers
| Player_name | Sixers | |
|---|---|---|
| 26 | Gayle, CH | 129 |
| 28 | Gilchrist, AC | 86 |
| 82 | Pathan, YK | 81 |
| 88 | Raina, SK | 97 |
| 97 | Sharma, RG | 82 |